################################################################################
# Evaluation of Finetuning Hate Speech Classifier with different 
# Annotation Datasets. (We use gpt-4o-mini-2024-07-18)
################################################################################
################################################################################
# Libraries
################################################################################
library(dplyr)
library(tidyr)
library(readr)
library(pbmcapply)
library(stringr)
library(tidymodels)
library(caret)
library(lubridate)
library(ggplot2)
library(ggthemes)
library(scales)
library(irr)
library(tidycomm)
library(forestmangr)
library(ggcorrplot)
library(RColorBrewer)
library(cvms)
library(cowplot)
library(binom)
library(boot)
library(xtable)
library(stargazer)
################################################################################
# Setup
################################################################################
rm(list = ls())

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
parent_path <- getwd()
getwd()

set.seed(123456789)
################################################################################
# Load Data 
################################################################################
df <- read_csv( "../tables/table_all_classifications_2_classes.csv")


################################################################################
# Filtering
################################################################################
df <- df %>%
  filter(!(group == "experts" & fine_tune %in% c(100, 250)))


gold <- df %>%
  filter(group == "experts", fine_tune == 0) %>%
  select(articleid, id, gold_label = hatespeech)

unique(df$group)

# Samples of hate toxic and non hate... 
samples_classes <- df %>% filter(group == "experts") %>% mutate(target = ifelse(hatespeech == 1 & !is.na(target_combined) == T, "hate speech",
                                                                                ifelse(hatespeech == 1 & is.na(target_combined) == T, "toxic",
                                                                                       "neither")))

samples_out <- samples_classes %>% slice_sample(n = 25, by = target)
write_csv(samples_out, "../tables/samples_for_text.csv")
################################################################################
# Join gold back onto all other runs
################################################################################
df_joined <- df %>%
  left_join(gold, by = c("articleid", "id")) %>%
  # drop the gold row itself from the classifier list
  filter(!(group == "experts" & fine_tune == 0)) %>%  
  # flag error types
  mutate(
    fp = as.integer(hatespeech == 1 & gold_label == 0),
    fn = as.integer(hatespeech == 0 & gold_label == 1)
  )

# compute total negatives/positives in gold
gold_counts <- gold %>%
  summarize(
    total_neg = sum(gold_label == 0),
    total_pos = sum(gold_label == 1)
  )
################################################################################
# Summarize overall FP/FN by classifier
################################################################################
error_summary <- df_joined %>%
  group_by(group, fine_tune) %>%
  summarize(
    n = n(),
    FP = sum(fp),
    FN = sum(fn),
    FP_rate = FP / gold_counts$total_neg * 100,
    FN_rate = FN / gold_counts$total_pos * 100
  ) %>%
  ungroup()

################################################################################
# Breakdown of FP & FN counts by target_combined
################################################################################
error_by_target <- df_joined %>%
  filter(fp == 1 | fn == 1) %>%
  group_by(group, fine_tune, error_type = if_else(fp == 1, "FP", "FN"), target_combined) %>%
  summarize(n_errors = n(), .groups = "drop") %>%
  group_by(group, fine_tune, error_type) %>%
  mutate(
    prop_within_error = n_errors / sum(n_errors) * 100
  ) %>%
  ungroup()

error_by_target <- error_by_target %>% mutate(target_combined = ifelse(error_type == "FN" & is.na(target_combined), "toxic", target_combined))

################################################################################
# Filtering of groups of interest!
################################################################################
error_by_target <- error_by_target %>% filter(fine_tune != 0) %>% filter(group != "chatGPT")
error_summary <- error_summary %>% filter(fine_tune != 0) %>% filter(group != "chatGPT")

error_summary %>% ungroup() %>% group_by(fine_tune) %>% mutate(mean_FP = mean(FP_rate), mean_FN = mean(FN_rate))
################################################################################
# Nice Tables
################################################################################
# ------------------------------------------------------------------------------
# Error summary table: LaTeX + HTML
# ------------------------------------------------------------------------------
stargazer(
  error_summary,
  type        = "latex",
  out         = "../tables/error_summary.tex",
  title       = "False Positive and False Negative Rates by Classifier",
  label       = "tab:error_summary",
  summary     = FALSE,
  rownames    = FALSE,
  digits      = 2
)

stargazer(
  error_summary,
  type        = "html",
  out         = "../tables/error_summary.html",
  title       = "False Positive and False Negative Rates by Classifier",
  label       = "tab:error_summary",
  summary     = FALSE,
  rownames    = FALSE,
  digits      = 2
)

# ------------------------------------------------------------------------------
# Error–by–target table: LaTeX + HTML
# ------------------------------------------------------------------------------
stargazer(
  error_by_target,
  type        = "latex",
  out         = "../tables/error_by_target.tex",
  title       = "Error Counts and Proportions by Classifier and Target Group",
  label       = "tab:error_by_target",
  summary     = FALSE,
  rownames    = FALSE,
  digits      = 2
)

stargazer(
  error_by_target,
  type        = "html",
  out         = "../tables/error_by_target.html",
  title       = "Error Counts and Proportions by Classifier and Target Group",
  label       = "tab:error_by_target",
  summary     = FALSE,
  rownames    = FALSE,
  digits      = 2
)

################################################################################
# Extract FN and FP cases into a csv
################################################################################
errors_all <- df_joined %>%
  filter(fp == 1 | fn == 1) %>%
  mutate(
    error_type = case_when(
      fp == 1 ~ "FP",
      fn == 1 ~ "FN"
    )
  )

write_csv(
  errors_all,
  "../tables/errors_all_fp_fn.csv"
)
